# Regression results from the MIT data NOT present while building modules 
load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_ast.Rdata"))
stats_ast = res_test$all_stats_df
stats_ast$network = "ast"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_mic.Rdata"))
stats_mic = res_test$all_stats_df
stats_mic$network = "mic"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_ext.Rdata"))
stats_ext = res_test$all_stats_df
stats_ext$network = "ext"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_inh.Rdata"))
stats_inh = res_test$all_stats_df
stats_inh$network = "inh"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_oli.Rdata"))
stats_oli = res_test$all_stats_df
stats_oli$network = "oli"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_end.Rdata"))
stats_end = res_test$all_stats_df
stats_end$network = "end"

load(paste0(work_dir, "replication_analysis_filt/MIT_res_test_opc.Rdata"))
stats_opc = res_test$all_stats_df
stats_opc$network = "opc"

all_stats = rbind(stats_ast, stats_mic, stats_ext, stats_inh, stats_oli, stats_end, stats_opc)
save(all_stats, file = paste0(work_dir, "replication_analysis_filt/all_res_test_stats_sn_MIT_filt.Rdata"))

All stats

load(paste0(work_dir, "replication_analysis_filt/all_res_test_stats_sn_MIT_filt.Rdata"))
createDT(all_stats %>% arrange(nom_p))

Counting modules

# Sn modules built from the Columbia 424 samples 
modules_reg_results = read_tsv(paste0(reg_modules_dir, "snModules_ADtraits_results.tsv.gz"))
# replication using MIT data (independent ROSMAP donors)
load(paste0(work_dir, "replication_analysis_filt/all_res_test_stats_sn_MIT_filt.Rdata"))

modules_trait_associ_repl = modules_reg_results %>% left_join(all_stats, by = c("phenotype","module","network"), suffix = c("_columbia","_mit")) %>%
  # filter(phenotype %in% c("amyloid_sqrt","tangles_sqrt","cogng_demog_slope")) %>%
  select(phenotype, module, network, nom_p_columbia, nom_p_mit, FDR, module2) %>%
  arrange(FDR) %>%
  mutate(replicated = FDR <= 0.05 & nom_p_mit <= 0.05)

# How many modules replicated?
bind_rows(modules_trait_associ_repl %>% filter(FDR <= 0.05) %>% summarise(n_discovery = n(), n_replication = sum(replicated)) %>%
  mutate(replication_rate = n_replication/n_discovery, threshold = 0.05),
modules_trait_associ_repl %>% filter(FDR <= 0.01) %>% summarise(n_discovery = n(), n_replication = sum(replicated)) %>%
  mutate(replication_rate = n_replication/n_discovery, threshold = 0.01),
modules_trait_associ_repl %>% filter(FDR <= 0.005) %>% summarise(n_discovery = n(), n_replication = sum(replicated)) %>%
  mutate(replication_rate = n_replication/n_discovery, threshold = 0.005),
modules_trait_associ_repl %>% filter(FDR <= 0.001) %>% summarise(n_discovery = n(), n_replication = sum(replicated)) %>%
  mutate(replication_rate = n_replication/n_discovery, threshold = 0.001)) %>%
  mutate(threshold = factor(threshold, levels = c(0.05,0.01,0.005,0.001))) %>%
  ggplot(aes(x = threshold, y = replication_rate)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = paste0(round(replication_rate*100, 1), "%\nof ",n_discovery)), vjust = 1.5, color = "white") +
  # scale_x_continuous(breaks = c(0.001, 0.005, 0.01, 0.05), labels = c("0.001", "0.005", "0.01", "0.05")) +
  labs(x = "FDR threshold", y = "Replication rate", title = "Replication of module-trait associations") +
  theme_minimal() -> plot_a

fdr_p_line = max(modules_trait_associ_repl$nom_p_columbia[modules_trait_associ_repl$FDR<0.05])
# compare the nominal p-values from the Columbia and MIT data (scater plot)
ggplot(modules_trait_associ_repl, aes(y = -log10(nom_p_columbia), x = -log10(nom_p_mit))) +
  geom_point(aes(color = replicated)) +
  # geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  scale_color_manual(values = c("grey", "red")) +
  geom_smooth(method='lm') +
  lims(y = c(0, 10)) +
  ggpubr::stat_cor(label.x.npc = .7, label.y.npc = 1) +
  geom_vline(xintercept = -log10(0.05), linetype = "dashed") +
  annotate("text", y=Inf, x=-log10(0.05), label=paste0("nom P < 5%"), size=3, angle=-90, vjust=-0.4, hjust=0) +
  geom_hline(yintercept = -log10(fdr_p_line), linetype = "dashed") +
  annotate("text", x=0, y=-log10(fdr_p_line), label=paste0("FDR < 5%"), size=3, angle=0, vjust=-0.4, hjust=0) +
  ggrepel::geom_text_repel(data = modules_trait_associ_repl %>% filter(replicated), aes(label = module2), size = 3.5, show.legend = F) +
  labs(y = "-log10 nominal p-value (Columbia)", x = "-log10 nominal p-value (MIT)", color = "Replicated") +
  theme_minimal() -> plot_b

# pdf(file = paste0(work_dir, "replication_analysis_filt/replication_MIT_ind.pdf"), width = 12, height = 5)
ggpubr::ggarrange(plot_a, plot_b, ncol = 2, widths = c(1, 2), labels = c("a)", "b)"))

# dev.off()
modules_trait_associ_repl = modules_reg_results[modules_reg_results$phenotype %in% c("amyloid_sqrt", "tangles_sqrt", "cogng_demog_slope"), ] %>% left_join(all_stats, by = c("phenotype","module","network"), suffix = c("_columbia","_mit")) %>%
  select(phenotype, module, network, nom_p_columbia, nom_p_mit, tstats_columbia, tstats_mit, FDR, module2) %>%
  arrange(FDR) %>%
  mutate(replicated = FDR <= 0.05 & nom_p_mit <= 0.05)

modules_trait_associ_repl$phenotype[modules_trait_associ_repl$phenotype == "amyloid_sqrt"] <- "Amyloid"
modules_trait_associ_repl$phenotype[modules_trait_associ_repl$phenotype == "tangles_sqrt"] <- "Tangles"
modules_trait_associ_repl$phenotype[modules_trait_associ_repl$phenotype == "cogng_demog_slope"] <- "Cognition"

# pdf(file = paste0(work_dir, "replication_analysis_filt/replication_MIT_pheno.pdf"), width = 12, height = 4)
ggplot(modules_trait_associ_repl #%>% filter(nom_p_columbia <= 0.05)
         ,aes(y = tstats_columbia, x = tstats_mit)) +
  geom_point(aes(color = replicated)) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  scale_color_manual(values = c("grey", "red")) +
  geom_smooth(method='lm') +
  # lims(y = c(0, 10)) +
  ggpubr::stat_cor() +
  ggrepel::geom_text_repel(data = modules_trait_associ_repl %>% filter(replicated), aes(label = module2), size = 3.5, show.legend = F) +
  labs(y = "t-statistics (Columbia)", x = "t-statistics (MIT)", color = "Replicated") +
  theme_minimal() +
  # coord_fixed() +
  facet_wrap(~phenotype, nrow = 1) 

# dev.off()

Session

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.9.3 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4    
##  [5] purrr_1.0.2     readr_2.1.5     tidyr_1.3.1     tibble_3.2.1   
##  [9] ggplot2_3.5.0   tidyverse_2.0.0 ggeasy_0.1.3   
## 
## loaded via a namespace (and not attached):
##  [1] ggrepel_0.9.5     Rcpp_1.0.12       lattice_0.20-45   digest_0.6.35    
##  [5] utf8_1.2.4        R6_2.5.1          backports_1.4.1   evaluate_0.23    
##  [9] highr_0.10        pillar_1.9.0      rlang_1.1.3       rstudioapi_0.16.0
## [13] car_3.1-2         jquerylib_0.1.4   Matrix_1.6-1.1    DT_0.30          
## [17] rmarkdown_2.26    splines_4.1.2     labeling_0.4.3    htmlwidgets_1.6.4
## [21] bit_4.0.5         munsell_0.5.1     broom_1.0.5       compiler_4.1.2   
## [25] xfun_0.43         pkgconfig_2.0.3   mgcv_1.8-38       htmltools_0.5.8.1
## [29] tidyselect_1.2.1  fansi_1.0.6       crayon_1.5.2      tzdb_0.4.0       
## [33] withr_3.0.0       ggpubr_0.6.0      grid_4.1.2        nlme_3.1-153     
## [37] jsonlite_1.8.8    gtable_0.3.4      lifecycle_1.0.4   magrittr_2.0.3   
## [41] scales_1.3.0      cli_3.6.2         stringi_1.8.3     vroom_1.6.5      
## [45] cachem_1.0.8      carData_3.0-5     farver_2.1.1      ggsignif_0.6.4   
## [49] bslib_0.7.0       generics_0.1.3    vctrs_0.6.5       cowplot_1.1.3    
## [53] tools_4.1.2       bit64_4.0.5       glue_1.7.0        hms_1.1.3        
## [57] crosstalk_1.2.1   abind_1.4-5       parallel_4.1.2    fastmap_1.1.1    
## [61] yaml_2.3.8        timechange_0.3.0  colorspace_2.1-0  rstatix_0.7.2    
## [65] knitr_1.46        sass_0.4.9